#
# makeCumulativeMap.R
#
# Make a map of the items used in the jointly scaled estimation
# of ANES ideology.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

library(data.table)
load("allyearsMultinomialFull.RData") # jdata object used for estimation.
if ("output" %in% ls()) { rm(output) } # if using a jagsout, remove memory intensive output.
load("anes_cdf_2012_appended_extra_50s60s.RData")

makePoints <- function(yearString,y=1,pt.cex=.8) {
  years <- as.numeric(strsplit(yearString,"\\|")[[1]])
  points(x=years,y=rep(y,length(years)),pch=19,cex=pt.cex)
}

# Data.table matching yearparty to year.
yp <- data.table(as.data.frame(jdata$codes,stringsAsFactors=F)) # map from yearparty to year and party
yp[,yearparty := as.numeric(as.character(yearparty))]; yp[,year := as.numeric(as.character(year))]
responses <- data.table(jdata$y)
# Fix column names. In new verison of jdata, variable names are in vector K.
if (names(responses)[1] == "V1" & "K" %in% names(jdata)) {
  setnames(responses,names(jdata$K))
}
# Figure out year of each response row.
responses[,yearparty := jdata$yearparty]
responses <- merge(responses,yp,by="yearparty",all.x=T)
responses[,yearparty := NULL]
warning("TODO: delete this year-dropping code for replication archive once new runs are done.\n")
# Drop 1948, 1952 and 1954 with no items/no glue.
if (responses[,any(year < 1955)]) {
  cat("Dropping records for cases in 1948, 1952, and 1954...\n")
  responses <- responses[!year < 1955,]
}
# Identify variables to count.
count.vars <- names(responses)[regexpr("year|pid3",names(responses)) == -1]
# Aggregate counts of non-missing cases by year.
resp.counts.yr <- responses[,lapply(.SD,function(x) sum(!is.na(x))),.SDcols=count.vars,by=c("year")]

# Loop over variable names and add to data.table combined.
combined <- NULL
for (v in names(resp.counts.yr)) {
  if (v == "year") next
  if (sum(resp.counts.yr[,v,with=F]) == 0) next # Ignore no-response items.
  yrs <- paste(resp.counts.yr[resp.counts.yr[[v]] != 0,year],collapse="|")
  combined <- rbindlist(list(combined,data.table(VarNumber=v,sources=yrs)))
}
# Create column VarLabel with variable label.
# First set of vars from cumulative file 1952-2008.
first.vars <- data.table(read.csv("ANES_Cum_Vars_ToUse.csv",as.is=T))
warning("TODO: delete this additional.2012.items code for replication archive once new runs are done.\n")
# Third set of vars from 1952-1966 cross sections.
add.1950s <- data.table(extra.vars.50s.60s)
setnames(add.1950s,c("VarNumber","VarLabel"))
add.1950s[,VarLabel := as.character(VarLabel)]
if (!is.null(additional.2012.items)) { # additional 2012 items from 2012 cross sec.
  VarLabels <- rbindlist(list(first.vars[,c("VarNumber","VarLabel"),with=F],
                            add.2012[,c("VarNumber","VarLabel"),with=F],
                            add.1950s))
} else { # only using 2012 items in cumulative file.
  VarLabels <- rbindlist(list(first.vars[,c("VarNumber","VarLabel"),with=F],
                            add.1950s))
}
combined <- merge(combined,VarLabels,by="VarNumber",all.x=T,all.y=F)
# Mixed case words.
.simpleCap <- function(x) {
    s <- strsplit(x, " ")[[1]]
    paste(toupper(substring(s, 1, 1)), substring(s, 2),
          sep = "", collapse = " ")
}
combined[,VarLabel := sapply(tolower(VarLabel),.simpleCap)]
# Set VarLabel to variable name if VarLabel is.na.
combined[VarLabel == "NANA",VarLabel := VarNumber]
# Sort on first year in data set.
combined[,first.year := as.numeric(sapply(sources,substr,1,4))]
setkey(combined,first.year)
combined <- as.data.frame(combined)
pdf("graphs/Cumulative_Map.pdf",width=10,height=8)
par(mar=c(4.1,11.1,0.1,0.1))
plot(x=c(1956,2012),y=c(1,nrow(combined)),ylim=c(nrow(combined),1),type='n',axes=F,ann=F)
abline(h=seq(1,nrow(combined)),col="lightgray")
axis(1,at=seq(1956,2012,2),labels=gsub("19","",(gsub("20","",seq(1956,2012,2)))))
title(xlab="Question fielded in survey year")
for (i in 1:nrow(combined)) {
  lab <- gsub("R Opinion: |R Placement: ","",combined[i,'VarLabel'])
  lab <- gsub("Strength of Opinion|R Opinion Strength","Strength",lab)
  lab <- gsub(" \\[2 Of 2\\]| \\[1 Of 2\\]","",lab)
  axis(2,at=i,labels=lab,las=2,cex.axis=.5)
  makePoints(yearString=combined[i,'sources'],y=i)
}
dev.off()
cat("Made plot with",nrow(combined),"questions.\n")
